# !diagnostics off
# above line fixes annoying unknown column warnings

library(xtable)
library(maps)
library(mapproj)
library(interplot)
library(texreg)
library(ggplot2)
library(dplyr)
library(scales)
library(lme4)
library(lmerTest)
library(memisc)
library(effects)
library(msm)
library(lubridate)
library(fiftystater)
library(cowplot)
library(haven)
library(readr)
library(stargazer)




# define theme
theme_pew <- function(base_size = 12, base_family = "") {
	# Starts with theme_grey and then modify some parts
	theme_grey(base_size = base_size, base_family = base_family) %+replace%
		theme(
			axis.line = element_line(colour = "black"),
			axis.text         = element_text(size = rel(0.8)),
			axis.ticks        = element_line(colour = "black"),
			legend.key        = element_rect(colour = "grey80"),
			panel.background = element_blank(),
			panel.border      = element_rect(fill = NA, colour = "grey50"),
			panel.grid.major = element_blank(),
			panel.grid.minor = element_blank(),
			axis.line.x = element_line(color="black", size = .3),
			axis.line.y = element_line(color="black", size = .3),
			strip.background  = element_rect(fill = "grey80", colour = "grey50", size = 0.2)
		)
}


cbColor = c("#E69F00", "#009E73", "#0072B2")
cbColorFull <- c("#E69F00", "#009E73", "#0072B2", "#D55E00", "#CC79A7")

setwd("~/Dropbox/projects/aggregtor/replication")

###############################
## Google News Index numbers ##
###############################

""" 
Indeed, the number of articles indexed by Google News that mention probabilistic 
forecasting rose from 

907 in 2008; 
https://goo.gl/3F68Gg

to 3,860 in 2012, 
https://goo.gl/LQNzNq

to 15,500  in 2016. 
https://goo.gl/uDgLtV

[click the Tools button to see numbers. 
Note that numbers may vary slightly over time.]
"""

###########################################
### The reach of probablistic forecasts ###
###########################################

#####################
### A: Cable News ###
#####################

data<-read.csv("archive.csv",header=T,as.is=T)

avg_mentions <- sum(data$total)/98
# Average mentions per day -- 98 days in data (8/1/2016 to 11/7/16)

# Make source labels pretty for plotting
data$source <- paste(data$source, "\n(", data$total, ")", sep="")
data$source <- as.factor(data$source)
data$source <- factor(data$source,levels(data$source)[c(2,4,5,1,3,6)])

cable <- ggplot(data, aes(source,total, fill= as.factor(slant))) + 
	geom_col()+
	scale_fill_manual(values=c("#0c10d3","#d20c0c","#9a9c9e"))  +
	theme_pew() + 
	ggtitle("(A) Cable news: Discussion \n of probabilistic forecasts")+
	theme(legend.position="none") +
	theme(axis.title.y = element_text(size=10)) +
	theme(axis.text.y = element_text(size=6)) +
	theme(axis.line=element_blank(),
		  axis.ticks=element_blank(),
		  axis.text.x = element_text(size=6),
		  axis.title.x = element_blank()) +
	ylab("Mentions") + 
	annotate("text", x = 2, y = 400, label = paste(round(avg_mentions,2), "average \nmentions/day"), size = 4) 
ggsave("cable.pdf", plot=cable, width = 3, height = 3)

######################
### B: Web traffic ###
######################

load("comscoreuniquevisitors.RData")
names(comscoreunique) <- c("Source","Date","visits","total_n","Reach")

# Filter to correct date range (8/1/2016 to 11/7/16)
comscoreunique <- comscoreunique[comscoreunique$Date >= as.Date("2016/08/01"),]
comscoreunique <- comscoreunique[comscoreunique$Date <= as.Date("2016/11/07"),]
comscoreunique$Source <- as.factor(comscoreunique$Source)

# Rename "aggregators" to "probabilistic forecasters"
levels(comscoreunique$Source)[1] <- "probabilistic forecasters"

web <- ggplot(data=comscoreunique, aes(x=Date, y=Reach, color=Source)) +
	scale_color_manual(values=c("#E69F00", "#009E73", "#0072B2"))  +
	geom_smooth(se=F)  +
	geom_point(alpha=.25) +
	theme_pew() + 
	xlab("") +
	ggtitle("(B) Web traffic: probabilistic \n forecasters and media sites")+
	theme(axis.title.x = element_text(size=7)) +
	theme(axis.title.y = element_text(size=10)) +
	theme(axis.text.y = element_text(size=6)) +
	ylab("Estimated Percent of U.S. Web Traffic") +
	theme(panel.background = element_blank(),
		  legend.text=element_text(size=5),
		  legend.position = "bottom",
		  legend.title=element_blank(),
		  legend.margin=margin(t = -.5, unit='cm')) +
	scale_y_continuous(labels=scales::percent)+
	guides(colour = guide_legend(override.aes = list(size=6)))
web
ggsave("web.pdf", plot =web, width = 3, height = 3)

##################
### C: Twitter ###
##################

# Read in Twitter data, from: 
# Get+media+URLs+for+aggregators.ipynb
# See also Get+media+URLs+for+aggregators.html
# File saved as GNIPForecastExperiment.tsv.gz

dat <- read.csv("ForecastExperiment_plotdat.csv")
dat$date <- ymd(dat$date)
dat <- dat[dat$domain!='NaN',]

# Remove predictwise from data
dat <- dat[-grep(pattern = "predictwise", x = dat$domain),]

# Grep to create data for the type of data
dat$type <- NA
dat$type[grep(pattern = "forecast", dat$domain)] <- "forecast"
dat$type[grep(pattern = "polls", dat$domain)] <- "polls"

# Set date range correctly
dat <- dat[dat$date < as.Date("2016-11-8"),]
dat %>% group_by(domain) %>% summarise(sum = sum(date.1))

plotdat <- dat %>% group_by(date, type) %>% summarise(total = sum(domain.1))

# get numbers for each
sum(plotdat$total[plotdat$type=="forecast"])
sum(plotdat$total[plotdat$type=="polls"])

p2 <- ggplot(plotdat, aes(date, total, color=type )) + 
	scale_color_manual(values=c("#E69F00", "#009E73"))  +
	geom_smooth(se=F) +
	geom_point(alpha=.25) +
	scale_x_date(date_labels = "%b") + 
	xlab("") + 
	ylab("Daily Shares") +
	theme_pew() + 
	ggtitle("(C) Twitter link sharing")+
	theme(axis.title.x = element_text(size=7)) +
	theme(axis.title.y = element_text(size=10)) +
	theme(axis.text.y = element_text(size=6)) +
	theme(panel.background = element_blank(),
		  legend.title=element_text(size=6),
		  legend.text=element_text(size=6),
		  legend.position = "none") +
	coord_cartesian(ylim = c(0, 10000))

p2 <- p2 + annotate("text", x =  as.Date("2016-09-25", "%Y-%m-%d" ), y = 6500, 
					label = "major probablistic\nforecasters", 
					color = "#E69F00", size = 3)
p2 <- p2 +  annotate("text", x =  as.Date("2016-10-25", "%Y-%m-%d" ), y = 1750, 
					 label = "major poll\ncollections", 
					 color = "#009E73", size = 3)
p2
ggsave("twitter.pdf", plot =p2, width = 3, height = 3)


#################
### D: Search ###
#################

data<-read.csv("geoMap.csv",header=T,as.is=T)

data("fifty_states")

# Code state-level 2016 winner
data$winner <- 0
data$winner[(data$NYT > data$Aggregators) &  (data$NYT > data$breitbart)] <-2
data$winner[(data$breitbart > data$Aggregators) &  (data$breitbart > data$NYT)] <-1
data$region <- tolower(data$region)
us <- map_data("state")

# plot map
p <- ggplot(data, aes(map_id = tolower(region))) + 
	geom_map(aes(fill = factor(winner)), map = fifty_states, color="white", size=.05)+ 
	expand_limits(x = fifty_states$long, y = fifty_states$lat) +
	coord_map() +
	scale_x_continuous(breaks = NULL) + 
	scale_y_continuous(breaks = NULL) +
	labs(x = "", y = "") +
	ggtitle("(D) Search: Probablistic\nforecasters and media sites") +
	theme_pew() + 
	scale_fill_manual(values = c("#E69F00", "#009E73", "#0072B2" )) + 
	theme(legend.position = "none", 
		  panel.background = element_blank(),
		  panel.border = element_blank()) 

# over time data
datatrend <- read.csv("multiTimeline.csv",header=T,as.is=T)
datatrend$Day <- as.Date(datatrend$Day, "%m/%d/%Y")
datatrend$Query <- as.factor(datatrend$Query)

# correct column name
levels(datatrend$Query)[1] <- "probabilistic forecasters"

# plot time series
p2 <- ggplot(data=datatrend, aes(x=Day, y=Rank, color=Query)) +
	scale_color_manual(values=c("#E69F00", "#009E73", "#0072B2" ))  +
	geom_line()  +
	theme_pew() +
	ylab("Relative Traffic") +
	xlab("") +
	theme(axis.title.x = element_text(size=6)) +
	theme(axis.title.y = element_text(size=6)) +
	theme(axis.text.y = element_text(size=10)) +
	theme(axis.text.x = element_text(size=6)) +
	theme(panel.background = element_blank(),
		  legend.title=element_text(size=6),
		  legend.text=element_text(size=6),
		  legend.margin=margin(t = 0, unit='cm'))+
	guides(colour = guide_legend(override.aes = list(size=6)))

pp <- ggdraw() +
	draw_plot(p, -.05, .25, 1, .75) +
	draw_plot(p2, -0.04, -.05, 1.04, .4)
pp
ggsave("google.pdf", plot =pp, width = 3, height = 3)


####################################################
####################################################
### Study 1: understanding p(win) and vote share ###
####################################################
####################################################

# Wave 25 ATP data
dta <- (read_sav('ATP W25.sav'))

# load wave 21 for previous vote history
dta21 <- (read_sav('ATP W21.sav'))

# load in treatment assignments:
treat_dat <- read.csv('assingments.csv')

treat_dat$display_cond <- gsub("_text", "", treat_dat$display_cond)
dta$condition <- treat_dat$display_cond[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]
dta$est <- treat_dat$est[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]
dta$est_prob <- treat_dat$est_prob[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]

# Recode certainty to 0-1
dta$certainty<-NA
dta$certainty[which(dta$CERTWIN_W25==1)]<-1
dta$certainty[which(dta$CERTWIN_W25==2)]<-.75
dta$certainty[which(dta$CERTWIN_W25==3)]<-.5
dta$certainty[which(dta$CERTWIN_W25==4)]<-.25
dta$certainty[which(dta$CERTWIN_W25==5)]<-0

# Remove NAs for DVs
dta$likely_to_win <- dta$LIKELYWIN_1_W25
dta$likely_to_win[dta$likely_to_win > 100] <- NA
dta$percent_vote <- dta$PRCNTVT_1_W25
dta$percent_vote[dta$percent_vote > 100] <- NA

# Load in vars from wave 21
dta$chance_of_voting_16 = dta21$SCALE10_W21[match(dta$QKEY, dta21$QKEY)]
dta$chance_of_voting_16[dta$chance_of_voting_16==99] <- NA
dta$Republican = dta$PARTY_W25 ==1
dta$Democrat = dta$PARTY_W25 ==2

dta$cond3 <- as.factor(dta$condition)
levels(dta$cond3) <- c("pwin", "both", "both", "share", "share", "both", "both")
levels(dta$cond3) <- c("P(win)", "Both", "Vote share")
dta$cond3 <- factor(dta$cond3, levels = c("P(win)", "Vote share", "Both"))

xlabs = c("45%\n(0.13)", "47.5%\n(0.29)", "50%\n(0.50)", "52.5%\n(0.71)","55%\n(0.87)")
data <- dta

dta$gt50 <- dta$est > .50
dta$gt50 <- c("Less than 50", "Greater than 50")[dta$gt50 + 1]
dta$gt50 <- factor(dta$gt50, levels = c("Less than 50", "Greater than 50"))

# TABLE: Allocation of respondents to treatments in study 1
table(dta$condition)

# TABLE: Values used in study 1 treatments
treat_dat[1:10, 2:4]

####################################
### A: R judgement re VOTE SHARE ###
####################################

xbreaks = c(.45, .475, .50, .525, .55)

ggplot(dta, aes(est,percent_vote/100, group=cond3, color = cond3)) + 
	geom_smooth(
		method = "loess",
		span = 1.5,
		aes(fill=cond3),
		alpha=0.6) +
	theme_pew()  +
	scale_y_continuous(labels=scales::percent) +
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .525, y = .665, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .47, y = .56, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Reported percent of vote expected for candidate A") +
	annotate("text", x = .530, y = .641, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .535, y = .6, label = "Both", 
			 color = cbColor[3], size = 6) + 
	annotate("text", x = .52, y = .49, label = "Actual vote share", 
			 color = "#707070", size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)")+ 
	theme(legend.position="none")+
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	geom_segment(aes(x=.45, y=.45, xend=.55, yend=.55),
				 linetype="dashed",color="#707070") +
	ggtitle("Reported vote share expected (LOESS)") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor)

ggsave(filename="votea.pdf", width=6.5,height=4.5)

m<-lm(percent_vote/100~est*cond3,data=dta)
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff[dataeff$est ==.55,]
dataeff[dataeff$est ==.45,]


diff(dataeff[dataeff$est==.55,]$fit)
diff(dataeff[dataeff$est==.45,]$fit)

dataeff <- dataeff[dataeff$est==.55,]
dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2])) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	scale_y_continuous(labels=scales::percent, 
					   breaks = c(.50, .60), limits = c(.45,.67)) +
	ggtitle("Vote share (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	
	# First arrow
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .62, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nlarger\nshare",sep=""),
			 hjust = 0) +
	
	# Second arrow 
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .61, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nlarger\nshare",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .61, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nlarger\nshare",sep=""),
			 hjust = 1) +
	
	# Max Accuracy
	# annotate("segment",x=.5,xend=2.95,y=.55,yend=.55,size=0.3,
	#          arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.25, y = .54, 
			 label = "Actual vote share",
			 hjust = 0) +
	geom_hline(yintercept=c(.55),linetype="dotted",color="#707070") +
	theme(legend.position="none") 
ggsave(filename="voteb.pdf", width=3,height=4.72)

################################
### B: Likelihood of winning ###
################################

ggplot(dta, aes(est,likely_to_win/100, group=cond3, color = cond3)) + 
	geom_smooth(
		method = "loess",
		span = 1.5,
		aes(fill=cond3),
		alpha=0.6) +
	theme_pew()  +
	# scale_y_continuous(labels=scales::percent) +
	scale_y_continuous( #labels=scales::percent, 
		breaks = c(.25, .5, .75), limits = c(.10,.90)) +
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .495, y = .85, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .475, y = .62, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Reported likelihood thatcandidate A wins") +
	annotate("text", x = .52, y = .71, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .502, y = .63, label = "Both", 
			 color = cbColor[3], size = 6) + 
	annotate("text", x = .4835, y = .22, label = "Actual win probablitiy", 
			 color = "#707070", size = 6) + 
	annotate("text", x = .52, y = .46, label = "Actual vote share", 
			 color = "#707070", size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)")+ 
	theme(legend.position="none")+
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	geom_segment(aes(x=.45, y=.13, xend=.55, yend=.87),
				 linetype="dashed",color="#707070") +
	geom_segment(aes(x=.45, y=.45, xend=.55, yend=.55),
				 linetype="dashed",color="#707070") +
	ggtitle("Reported likelihood that Candidate A wins (LOESS)") +
	scale_colour_manual(values=cbColor) +
	scale_fill_manual(values=cbColor)

ggsave(filename="likelihood_loess.pdf", width=6.5,height=4.5)

m<-lm(likely_to_win/100~est*cond3,data=dta)
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff[dataeff$est ==.55,]
dataeff[dataeff$est ==.45,]


diff(dataeff[dataeff$est==.55,]$fit)
diff(dataeff[dataeff$est==.45,]$fit)

dataeff <- dataeff[dataeff$est==.55,]
dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2])) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	scale_y_continuous( #labels=scales::percent, 
		breaks = c(.25, .5, .75), limits = c(.10,.90)) +
	ggtitle("Likelihood of winning (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	
	# First arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .68, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nmore",sep=""),
			 hjust = 1) +
	
	# Second arrow 
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .58, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nmore",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .62, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nmore",sep=""),
			 hjust = 0) +
	# Max Accuracy
	annotate("text", size=3, x = 1.25, y = .86, 
			 label = "Actual win-probability",
			 hjust = 0) +
	geom_hline(yintercept=c(.87),linetype="dotted",color="#707070") +
	annotate("text", size=3, x = 1.25, y = .54, 
			 label = "Actual vote share",
			 hjust = 0) +
	geom_hline(yintercept=c(.55),linetype="dotted",color="#707070") +
	theme(legend.position="none") 
ggsave(filename="likelihood_win55.pdf", width=3,height=4.72)

####################
### C: Certainty ###
####################

ggplot(data, aes(est,certainty, group=cond3, color = cond3)) + 
	geom_smooth(method = "loess", aes(fill=cond3), span = 1.5, alpha=0.6) +
	theme_pew()  +
	coord_cartesian(ylim = c(.5,.72)) + 
	scale_y_continuous(labels=scales::percent) + 
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .525, y = .72, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .53, y = .51, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Certainty reported that candidate A will win/lose") +
	annotate("text", x = .49, y = .641, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .52, y = .58, label = "Both", 
			 color = cbColor[3], size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)")+ 
	theme(legend.position="none")+
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	ggtitle("Certainty (LOESS)")+
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) 

ggsave(filename="certaintyc.pdf", width=6.5,height=4.5)


m<-lm(certainty~est*cond3,data=data)
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff <- dataeff[dataeff$est==.55,]

dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2]),height=200) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	coord_cartesian(ylim = c(.5,.72)) + 
	scale_y_continuous(labels=scales::percent) + 
	ggtitle("Certainty (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	# First arrow
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .59, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nmore\ncertain",sep=""),
			 hjust = 0) +
	
	# Second arrow 
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .63, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nmore\ncertain",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .65, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nmore\ncertain",sep=""),
			 hjust = 1) +
	theme(legend.position="none") 

ggsave(filename="certaintyd.pdf", width=3, height=4.72)

dta$cond3 <- factor(dta$cond3, levels = c("Vote share","Both","P(win)"))

percent_vote <- lm(percent_vote/100 ~
				   	est * cond3, 
				   data = dta)
certainty <- lm(certainty ~
					cond3, 
				data = dta)
summary(percent_vote)
summary(certainty)

# Conflated vote share and prob? 
threshold = .01
dta$conflated_p_vs <- abs(dta$percent_vote/100 - dta$est_prob) < threshold
se = function(x) sd(x, na.rm = T)/sqrt(length(na.omit(x)))
dta %>% group_by(cond3) %>% 
	summarise(m = mean(conflated_p_vs, na.rm=T), se = se(conflated_p_vs))

# Would R hypothetically vote? 
dta$VOTEHYPO_W25[dta$VOTEHYPO_W25==99] <- NA
dta$vote <- dta$VOTEHYPO_W25
dta$vote <- 5 - dta$vote
summary(dta$vote)
m<-lm(I(vote==3 | vote==4) ~est*cond3,data=dta)
summary(m)
m<-lm(I(vote==3 | vote==4)~cond3,data=dta)
summary(m)
mean(dta$vote==3 | dta$vote==4, na.rm=TRUE)
dta %>% group_by(cond3) %>% summarise(
	mean(vote==3 | vote==4, na.rm=TRUE) )
# No effects but see SI.



#####################
#####################
### Study 2: Game ###
#####################
#####################

load("longform.RData")

longdata$abs_prob_dist <- abs(longdata$probability - .5)
longdata$abs_margin_dist <- abs(longdata$margin/100 - .5)

#############################
### A: Response to P(WIN) ###
#############################

longdata$trial <- as.factor(longdata$trial)
prob_margin_abs <- lme4::lmer(voted~abs_prob_dist + abs_margin_dist + trial + (1|V1), longdata)

## comparing coefficients
m_coefs <- summary(prob_margin_abs)$coefficients 
(m_coefs[3,1]-m_coefs[2,1])/sqrt(m_coefs[2,2]^2+m_coefs[3,2]^2)
library(car)
linearHypothesis(prob_margin_abs,"abs_prob_dist = abs_margin_dist",test="F")
confint(prob_margin_abs)
#abs_prob_dist   -0.2553908 -0.09026258
#abs_margin_dist -0.5000610  0.23513231


## Fixed effects model
library(lfe)
library(stargazer)

longdata$trial <- as.factor(longdata$trial)
prob_margin_abs <- felm(voted~abs_prob_dist + abs_margin_dist|trial + V1|0|V1, longdata)

# try rescaling to 01: 
longdata$abs_prob_dist01 = longdata$abs_prob_dist/max(longdata$abs_prob_dist, na.rm=T)
longdata$abs_margin_dist01 = longdata$abs_margin_dist/max(longdata$abs_margin_dist, na.rm=T)
summary(longdata$abs_margin_dist01)
summary(longdata$abs_prob_dist01)
prob_margin_abs01 <- felm(voted~abs_prob_dist01 + abs_margin_dist01|trial + V1|0|V1, longdata)

linearHypothesis(prob_margin_abs01,"abs_prob_dist01 = abs_margin_dist01",test="F")

stargazer(prob_margin_abs,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", "abs(Midpoint Distance, Vote Share)"))
stargazer(prob_margin_abs)
length(unique(longdata$V1))

# install.packages('dotwhisker')

library(dotwhisker)
library(broom)
library(dplyr)

plotdf <- summary(prob_margin_abs)$coefficients
plotdf <- as.data.frame(plotdf) %>% tibble::rownames_to_column("term") 
names(plotdf) <- c('term', 'estimate', 'std.error', 't', 'p')
dwplot(plotdf) + 
	geom_vline(xintercept=0,linetype="dotted",color="#707070") +
	xlab("coefficient") +  
	theme(legend.position="none")+
	theme_pew() + 
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	theme(legend.position="none")

plotdf <- summary(prob_margin_abs01)$coefficients
plotdf <- as.data.frame(plotdf) %>% tibble::rownames_to_column("term") 
names(plotdf) <- c('term', 'estimate', 'std.error', 't', 'p')
dwplot(plotdf) + 
	geom_vline(xintercept=0,linetype="dotted",color="#707070") +
	xlab("coefficient") +  
	theme(legend.position="none")+
	theme_pew() + 
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	theme(legend.position="none")


mean(longdata$voted)
summary(prob_margin_abs)
stargazer(prob_margin_abs,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", "abs(Midpoint Distance, Vote Share)", "Trial 2", "Trial 3","Trial 4", "Trial 5", "Intercept Average"))

Eff <- Effect(c("abs_prob_dist"), prob_margin_abs,
			  xlevels=list(abs_prob_dist=seq(0, .49, by=.01)))

preds <- as.data.frame(Eff)

a <- ggplot(preds, aes(x=abs_prob_dist, y=fit,
					   ymin=lower, ymax=upper,color=cbColor[2],fill=cbColor[2])) + 
	geom_line() +
	geom_ribbon(alpha=0.5) +
	xlab("Distance from even odds (pr = .5)\npresented to participant") +
	ylab("Proportion of respondents\nspending $1 to vote") + 
	coord_cartesian(ylim = c(.6, .8)) + 
	ggtitle("(A) Response to probabilities")+ 
	theme_pew() +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor)+ 
	theme(legend.position="none", text = element_text(size=18))

a

#################################
### B: Response to Vote Share ###
#################################

Eff <- Effect(c("abs_margin_dist"), prob_margin_abs,
			  xlevels=list(abs_margin_dist=seq(0, .10, by=.025)))
preds <- as.data.frame(Eff)

b <- ggplot(preds, aes(x=abs_margin_dist, y=fit,
					   ymin=lower, ymax=upper, color=cbColor[2],fill=cbColor[2])) + 
	# ylim(c(.625, .80)) +
	ylim(c(.5, 1)) +
	geom_line() +
	geom_ribbon(alpha=0.5) +
	xlab("Distance from even vote shate (50%)\npresented to participant") + 
	ylab("Proportion of respondents\nspending $1 to vote") + 
	coord_cartesian(ylim = c(.6, .8)) + 
	ggtitle("(B) Response to vote share") + 
	theme_pew()	+
	scale_colour_manual(values=cbColor[2])+
	scale_fill_manual(values=cbColor[2])+ 
	theme(legend.position="none", text = element_text(size=18))

###################################################
### C: Difference as function of dist from even ###
###################################################

Eff <- Effect(c("abs_prob_dist"), prob_margin_abs,
			  xlevels=list(abs_prob_dist=seq(0,.5, by=.05)))
preds <- as.data.frame(Eff)

preds_diff <- data.frame(change= numeric(0), diff= numeric(0), se= numeric(0), lower = numeric(0), upper = numeric(0))
change <- 0
for (i in 2:11){
	change <- change + .05
	diff <- preds$fit[i] - preds$fit[1]
	
	if(i ==2){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.05 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==3){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.10 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==4){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.15 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==5){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.2 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==6){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.25 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==7){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.3 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==8){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.35 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==9){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.4 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==10){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.45 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}else if(i ==11){
		se <- deltamethod(~ (x2*0 + x1 + x3 + x4 + x5 + x6 + x7) - 
						  	(x2*.5 + x1 + x3 + x4 + x5 + x6 + x7), 
						  fixef(prob_margin_abs), vcov(prob_margin_abs))
	}
	
	lower <- diff - (1.96*se)
	upper <- diff + (1.96*se)
	preds_diff <- rbind(preds_diff,cbind(change,diff, se, lower, upper))
}


margins <- read.csv("16margin.csv", header=T,as.is = T)
prob <- read.csv("16prob.csv", header=T,as.is = T)
prob <- prob[-which(prob$Source=="PredictWise"),]
prob$Source <- factor(prob$Source, 
					  levels = c("FiveThirtyEight", "New York Times", "HuffingtonPost", "Princeton"))

c <- ggplot(preds_diff, aes(change,diff*100)) +
	geom_line( color=cbColor[3],fill=cbColor[3]) +
	geom_ribbon(aes(ymin=lower*100,ymax=upper*100), alpha=0.5, color=cbColor[3],fill=cbColor[3]) +
	geom_vline(data = na.omit(prob), aes(xintercept = distance, color=Source),linetype="longdash",size=.7)  +
	xlab("Increase in reported probability from even odds") + 
	ylab("Predicted decrease in % voting") + 
	ggtitle("(C) Changes in voting by distance from even odds") + 
	scale_x_continuous(breaks = seq(-.4, 0.5, by = 0.1)) +
	guides(colour = guide_legend(override.aes = list(size=6))) +
	theme_pew() +
	scale_colour_manual(values=cbColorFull)+
	scale_fill_manual(values=cbColorFull) + 
	scale_color_discrete(name = "2016 last reported\ndistance from even\nodds by venue") +
theme( text = element_text(size=18))

(pp <- ggdraw() +
		draw_plot(a, 0, .5, .5, .5) +
		draw_plot(b, .5, .5, .5, .5) +
		draw_plot(c, 0, 0, .65, .5))

ggsave(filename="study2.pdf", plot=pp,width=11,height=9)

##############
## Numeracy ##
##############

t <- read.csv("ag.csv")

# score index
t$score <- 0

t$score[t$Q88==25] <- t$score[t$Q88==25] + 1
t$score[t$Q89==30] <- t$score[t$Q89==30] + 1
t$score[t$Q90==50] <- t$score[t$Q90==50] + 1
nt<-table(t$score)

t<-t[,c("V1","score")]
longdata <- merge(x=longdata, y=t, by.x="V1", by.y='V1')

longdata$score <- longdata$score / 3

nt[1:4] <- paste(c("0%","33%","66%","100%"), " (N=", nt[1:4],")", sep="")

longdata$score <- as.factor(longdata$score)
levels(longdata$score) <- nt[1:4]
library(stargazer)
longdata$trial <- as.factor(longdata$trial)
prob_margin_abs <- lme4::lmer(voted~abs_prob_dist*score + score + abs_margin_dist + trial + (1|V1), longdata)
mean(longdata$voted)
summary(prob_margin_abs)
stargazer(prob_margin_abs,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", "Numeracy: 33% Correct", "Numeracy: 66% Correct" ,"Numeracy: 100% Correct", "abs(Midpoint Distance, Vote Share)", "Trial 2", "Trial 3","Trial 4", "Trial 5", "abs(Midpoint Distance, Probability) X Numeracy: 33% Correct", "abs(Midpoint Distance, Probability) X Numeracy: 66% Correct", "abs(Midpoint Distance, Probability) X Numeracy: 100% Correct","Intercept Average"))



###########################
### Study 3: Group size ###
###########################


load("longdataN.rdata")

prob_margin_abs_n <- lme4::lmer(voted~abs_prob_dist*n + abs_margin_dist*n + 
									trial + prob_first + (1|gid), longdataN)
prob_margin_abs_order <- lme4::lmer(voted~abs_prob_dist*n + abs_margin_dist*n + 
										abs_prob_dist*prob_first + abs_margin_dist*prob_first + 
										trial + (1|gid), longdataN)
stargazer(prob_margin_abs_n, prob_margin_abs_order,
		  # type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", 
		  					 "Group Size (N)",
		  					 "abs(Midpoint Distance, Vote Share)", 
		  					 "Trial 2", "Trial 3","Trial 4", "Trial 5", 
		  					 "Prob. First", 
		  					 "abs(Midpoint Distance, Probability) X Group Size (N)",
		  					 "abs(Midpoint Distance, Vote Share) X Group Size (N)",
		  					 "abs(Midpoint Distance, Probability) X Prob. First",
		  					 "abs(Midpoint Distance, Vote Share) X Prob. First",
		  					 "Intercept Average"))

